Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)
To run this notebook reproducibly, follow these steps:
In [ ]:
g_dataset_name = "Notebook6Test"
g_library_fp = '~/dual_crispr/library_definitions/test_library_2.txt'
g_count_fps_or_dirs = '/home/ec2-user/dual_crispr/test_data/test_set_6a,/home/ec2-user/dual_crispr/test_data/test_set_6b'
g_time_prefixes = "T,D"
g_prepped_counts_run_prefix = ""
g_prepped_counts_dir = '~/dual_crispr/test_outputs/test_set_6'
In [ ]:
import inspect
import ccbb_pyutils.analysis_run_prefixes as ns_runs
import ccbb_pyutils.files_and_paths as ns_files
import ccbb_pyutils.notebook_logging as ns_logs
def describe_var_list(input_var_name_list):
description_list = ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
return "".join(description_list)
ns_logs.set_stdout_info_logger()
In [ ]:
import dual_crispr.count_combination as ns_combine
print(inspect.getsource(ns_combine.get_combined_counts_file_suffix))
In [ ]:
import ccbb_pyutils.string_utils as ns_string
print(inspect.getsource(ns_string.split_delimited_string_to_list))
In [ ]:
import os
def get_count_file_fps(comma_sep_fps_or_dirs_str):
result = []
fps_or_dirs = comma_sep_fps_or_dirs_str.split(",")
for curr_fp_or_dir in fps_or_dirs:
trimmed_curr = curr_fp_or_dir.strip()
trimmed_curr = ns_files.expand_path(trimmed_curr)
if os.path.isdir(trimmed_curr):
combined_counts_fps = ns_files.get_filepaths_from_wildcard(trimmed_curr,
ns_combine.get_combined_counts_file_suffix())
result.extend(combined_counts_fps)
else:
result.append(trimmed_curr)
return result
g_library_fp = ns_files.expand_path(g_library_fp)
g_count_file_fps = get_count_file_fps(g_count_fps_or_dirs)
g_prepped_counts_run_prefix = ns_runs.check_or_set(g_prepped_counts_run_prefix,
ns_runs.generate_run_prefix(g_dataset_name))
g_time_prefixes_list = ns_string.split_delimited_string_to_list(g_time_prefixes)
g_prepped_counts_dir = ns_files.expand_path(g_prepped_counts_dir)
print(describe_var_list(['g_library_fp', 'g_count_file_fps', 'g_prepped_counts_run_prefix', 'g_time_prefixes_list']))
ns_files.verify_or_make_dir(g_prepped_counts_dir)
In [ ]:
import dual_crispr.scoring_prep as ns_prep
print(inspect.getsource(ns_prep))
In [ ]:
def merge_and_write_timepoint_counts(count_file_fps, constructs_fp, run_prefix, dataset_name, time_prefixes_list,
output_dir, disregard_order=True):
joined_df = ns_prep.merge_and_annotate_counts(count_file_fps, constructs_fp, dataset_name,
time_prefixes_list, disregard_order=True)
prepped_file_suffix = ns_prep.get_prepped_file_suffix()
output_fp = ns_files.build_multipart_fp(output_dir, [run_prefix, prepped_file_suffix])
joined_df.to_csv(output_fp, index=False, sep='\t')
In [ ]:
merge_and_write_timepoint_counts(g_count_file_fps, g_library_fp, g_prepped_counts_run_prefix, g_dataset_name,
g_time_prefixes_list, g_prepped_counts_dir, True)
In [ ]:
print(ns_files.check_file_presence(g_prepped_counts_dir, g_prepped_counts_run_prefix,
ns_prep.get_prepped_file_suffix(),
check_failure_msg="Scoring preparation failed to produce an output file."))